/* Set up the log*/
cd $ohie
cap log close
global sysdate: disp %tdYYNNDD  date("`c(current_date)'", "DMY")
qui log using 	"./logs/diff_in_diff_ohie_$sysdate.log", replace

* Set up timer
timer clear 1
timer on 1 

/*----------------------------------------------------------------------*/
/* PROGRAM: ohie_sumstats.do						*/
/*									*/
/* PURPOSE:								*/
/* [*]	This code runs and bootstraps the internal and external 	*/
/*	validity tests, as well as the difference between compliers for */
/*	the OHIE 1 lottery entrants sample. The external validity test 	*/
/*	is also called the difference-in-difference test. The results	*/
/*	produced here are reported in the top panel of Table 		*/								
/*	"ohie_brfss_sumstats_new" in the paper.				*/
/*									*/
/* OUTPUT:								*/
/* [*]	diff_in_diff_ohie: This output file contains the estimate */
/*	and bootstrapped standard errors reported in the top panel of 	*/
/*	Table "ohie_brfss_sumstats_new" of the paper.			*/
/* [*] 	/logs/diff_in_diff_<sysdate>: The log file for this program. 	*/
/*									*/
/*----------------------------------------------------------------------*/

* Set up display options
clear
set type double
set more off, permanently

*************************************************
* OTHER SETUP			*
*************************************************

* Set up seeds
* This seed stays the same across all codes.
global Y_num_seed = 	6574358

*************************************************
* VARIABLES AND CONTROLS	  		*
*************************************************

local indata "$final/oregonnumhh1.dta"
	
* Endogenous variable
local D	"any_medicaid"

* Instrument
local Z	"Z"
		
* Specify the variables for which to compute internal and external 
* validity tests
* This list should include all variables from the ohie_sumstats table
local statsvars "female age english Y_pre_bi1 Y_pre_bi2 Y_pre_bi3 Y_pre_bi4 selfhealth_fair"

* Number of bootstrap replications
* The first replication of the bootstrap process has no re-sampling; in other 
* words the first replication always runs on the full original non-bootstrapped 
* sample.
local reps = 1001

* Define the seeds for the variables other than the outcomes
foreach v of local statsvars {
	global `v'_seed = 6574360
}
	
* Output Excel file for bootstrapped statistics
local out_file "diff_in_diff_ohie"
	
*************************************************
* RUN CODE			  		*
*************************************************
	
* Start loop for variables here
foreach v of local statsvars {

	* Set the seed for each variable separately
	set seed $`v'_seed

	* Define a separate matrix for each covariate/predicted outcome/outcome 
	* as an output matrix
	matrix define boot_`v' = J(`reps', 27, .)	

	* Initialize the local macro for renaming variables in the matrix
	local renamevars ""
	
*************************************************
* RUN TESTS			  		*
*************************************************
	
	* Start bootstrap replications here
	forval rep = 1/`reps' {
		
		* Set up and re-sample
		use "`indata'", clear	
			
		* Initialize the iterator for matrix output
		local matrixiter = 1
		
		* Drop observations where the variable is missing
		* We do this before bootstrapping in order to ensure that each 
		* bootstrapping sample has the same sample size. In addition, 
		* this ensures that we do not oversample individuals with 
		* missing values when bootstrapping.
		drop if `v'==.				
		local obs = _N
		
		* Observations by D
		qui count if `D'==1
		local N_T = `r(N)'
		
		qui count if `D'==0
		local N_U = `r(N)'
		
		* Re-sample for bootstrap
		qui if `rep' > 1 {
			bsample
		}
		
		* Mean for all
		qui su `v'
		local mean_all = `r(mean)'
		
		* Mean for always takers
		qui su `v' if `D'==1 & `Z'==0 
		local mean_AT = `r(mean)'
		
		* Mean for always takers and treated compliers
		qui su `v' if `D'==1 & `Z'==1
		local mean_ATTC = `r(mean)'	
		
		* Mean for never takers and untreated compliers		
		qui su `v' if `D'==0 & `Z'==0 
		local mean_NTUC = `r(mean)'
		
		* Mean for untreated individuals	
		qui su `v' if `D'==0
		local mean_UT = `r(mean)'
		
		* Mean for treated individuals	
		qui su `v' if `D'==0
		local mean_T = `r(mean)'
		
		* Mean for Never takers
		qui su `v' if `D'==0 & `Z'==1 
		local mean_NT = `r(mean)'
		
		* pB is defined as P(D=1|Z=0)
		qui su `D' if `Z' == 0 
		local pB = `r(mean)'
		
		* pI is defined as P(D=1|Z=1)
		qui su `D' if `Z' == 1 
		local pI = `r(mean)'
		
		* Define fraction of the sample who are (un)treated compliers
		qui su `Z'
		local N_TC = (`pI'-`pB')*`r(mean)'*_N
		local N_UC = (`pI'-`pB')*(1-`r(mean)')*_N
		local N_C = `N_TC' + `N_UC'
		
		* Number of always takers and never takers
		local N_AT = _N*`pB'
		local N_NT = _N*(1-`pI')
		
		qui count if `D'==0
		local N_NTUC = `r(N)'

		* Calculate the treated compliers average 
		local mean_TC = (1/(`pI' - `pB'))*(`pI'*`mean_ATTC' - `pB'*`mean_AT')

		* Calculate the untreated compliers average 
		local mean_UC = (1/(`pI' - `pB'))*((1-`pB')*`mean_NTUC' - (1-`pI')*`mean_NT')
		
		* Calculate the compliers average
		qui su `v'
		local mean_C = (1/(`pI'-`pB'))*(`r(mean)' - `pB'*`mean_AT' - (1-`pI')*`mean_NT')
		
		* Output
		matrix boot_`v'[`rep', `matrixiter'] = `pB'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' pB"
		
		matrix boot_`v'[`rep', `matrixiter'] = `pI'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' pI"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_all'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' mean_all"
		
		matrix boot_`v'[`rep', `matrixiter'] = `obs'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' N"
		
		matrix boot_`v'[`rep', `matrixiter'] = `N_AT'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' N_AT"
		
		matrix boot_`v'[`rep', `matrixiter'] = `N_NT'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' N_NT"
		
		matrix boot_`v'[`rep', `matrixiter'] = `N_T'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' N_T"
		
		matrix boot_`v'[`rep', `matrixiter'] = `N_U'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' N_UT"
		
		matrix boot_`v'[`rep', `matrixiter'] = `N_TC'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' N_TC"
		
		matrix boot_`v'[`rep', `matrixiter'] = `N_UC'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' N_UC"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_AT'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' mean_AT"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_C'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' mean_C"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_NT'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' mean_NT"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_UC'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' mean_UC"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_TC'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' mean_TC"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_NTUC'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' mean_NTUC"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_UT'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' mean_UT"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_T'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' mean_T"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_AT'-`mean_C'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' AT_C_Diff"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_C'-`mean_NT'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' C_NT_Diff"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_AT'-`mean_NT'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' AT_NT_Diff"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_TC'-`mean_UC'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' TC_UC_Diff"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_AT'-`mean_TC'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' AT_TC_Diff"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_UC'-`mean_NT'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' UC_NT_Diff"
				
	} // bootstrap loop ends here
		
	* Save the output matrix	
	svmat double boot_`v'
		
	* Delete extra variables and observations from the output matrix
	keep boot_`v'*
		
	gen obs_num = _n
	drop if obs_num>`reps'
	drop obs_num
				
	* Rename variables
	local i=1		
	foreach var of local renamevars {			
		rename boot_`v'`i' `var'
		local ++i
	}
	
	* Output the bootstrap statistics to Excel	
	bootstrap_statistics $output `out_file' `v'
}

timer off 1
timer list 1
local hours = `r(t1)'/3600
di "Computing time is `hours' hours"

cap qui log close
